MMSegmentation for Remote sensing
a tutorial for using mmsegmentation for land cover classification tasks in remote sensing
- Install MMSegmentation
- Run Inference with MMSeg trained weight
- Train a semantic segmentation model on a new dataset
In this tutorial, we demo
- How to do inference with MMSeg trained weight
- How to train on your own dataset and visualize the results.
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version
# # Install PyTorch
# !pip install -U torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
# # Install MMCV
# !pip install mmcv-full==latest+torch1.5.0+cu101 -f https://download.openmmlab.com/mmcv/dist/index.html
# !rm -rf mmsegmentation
# !git clone https://github.com/open-mmlab/mmsegmentation.git
# %cd mmsegmentation
# !pip install -e .
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
# Check MMSegmentation installation
import mmseg
print(mmseg.__version__)
# !mkdir checkpoints
# !wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints
from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
from mmseg.core.evaluation import get_palette
config_file = '/home/ubuntu/sharedData/swp/dlLabSwp/favourite/swpFastTest/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'
checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
# build the model from a config file and a checkpoint file
model = init_segmentor(config_file, checkpoint_file, device='cuda:0')
# test a single image
img = '/home/ubuntu/sharedData/swp/dlLabSwp/favourite/swpFastTest/mmsegmentation/demo/demo.png'
result = inference_segmentor(model, img)
# show the results
show_result_pyplot(model, img, result, get_palette('cityscapes'))
Add a new dataset
Datasets in MMSegmentation require image and semantic segmentation maps to be placed in folders with the same perfix. To support a new dataset, we may need to modify the original file structure.
In this tutorial, we give an example of converting the dataset. You may refer to docs for details about dataset reorganization.
We use Standord Background Dataset as an example. The dataset contains 715 images chosen from existing public datasets LabelMe, MSRC, PASCAL VOC and Geometric Context. Images from these datasets are mainly outdoor scenes, each containing approximately 320-by-240 pixels. In this tutorial, we use the region annotations as labels. There are 8 classes in total, i.e. sky, tree, road, grass, water, building, mountain, and foreground object.
# download and unzip
# !wget http://dags.stanford.edu/data/iccv09Data.tar.gz -O standford_background.tar.gz
# !tar xf standford_background.tar.gz
# Let's take a look at the dataset
import mmcv
import matplotlib.pyplot as plt
from fastcore.basics import *
from fastai.vision.all import *
from fastai.torch_basics import *
import warnings
warnings.filterwarnings("ignore")
import kornia
from kornia.constants import Resample
from kornia.color import *
from kornia import augmentation as K
import kornia.augmentation as F
import kornia.augmentation.random_generator as rg
from torchvision.transforms import functional as tvF
from torchvision.transforms import transforms
from torchvision.transforms import PILToTensor
import matplotlib.pyplot as plt
import numpy as np
set_seed(105)
train_a_path = Path("/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/2_Ortho_RGB/")
label_a_path = Path("/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/5_labels_for_participants/")
dsm_path = Path("/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/1_dsm/1_DSM/")
ndsm_path = Path("/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/1_dsm_normalisation/1_DSM_normalisation/")
imgNames = get_image_files(train_a_path)
lblNames = get_image_files(label_a_path)
dsmNames = get_image_files(dsm_path)
# data
imgNames[0]= Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/2_Ortho_RGB/top_potsdam_2_11_RGB.tif')
lblNames[0]= Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/5_labels_for_participants/top_potsdam_2_11_label.tif')
dsmNames[0]=Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/rsData/kaggleOriginal/Potsdam/1_dsm/1_DSM/dsm_potsdam_02_11.tif')
img = mmcv.imread(imgNames[0],channel_order='rgb')
plt.figure(figsize=(8, 8))
plt.imshow(img)
plt.axis('off')
plt.show()
torch.cuda.empty_cache()
!nvidia-smi
look at the annotations
to_tensor = transforms.ToTensor()
to_pil = transforms.ToPILImage()
rgbImage = Image.open(imgNames[0])
lblImage = Image.open(lblNames[0])
dsmImage = Image.open(dsmNames[0])
rgbTensor = image2tensor(rgbImage)
lblTensor = image2tensor(lblImage)
dsmTensor = image2tensor(dsmImage)
type(lblTensor)
rgbTensor.shape
lblTensor.shape
dsmTensor.shape
torch.unique(lblTensor)
lblTensor.shape
# pay attention to the dimension, different software package will always leading to different dimensions of the image:
# PIL:(C,H,W)
# SKImage: (H,W,C)
rgbArray = to_np(rgbTensor).transpose(1,2,0)
lblArray = to_np(lblTensor).transpose(1,2,0)
dsmArray = to_np(dsmTensor).transpose(1,2,0)
np.unique(lblArray)
original image is composed of 0 and 255, we need to have a change, turn to the grad scale image with constant values
rgbArray.shape
type(rgbArray)
palette = {0 : (255, 255, 255), # Impervious surfaces (white)
1 : (0, 0, 255), # Buildings (blue)
2 : (0, 255, 255), # Low vegetation (cyan)
3 : (0, 255, 0), # Trees (green)
4 : (255, 255, 0), # Cars (yellow)
5 : (255, 0, 0), # Clutter (red)
6 : (0, 0, 0)} # Undefined (black)
invert_palette = {v: k for k, v in palette.items()}
def convert_to_color(arr_2d, palette=palette):
""" Numeric labels to RGB-color encoding """
arr_3d = np.zeros((arr_2d.shape[0], arr_2d.shape[1], 3), dtype=np.uint8)
for c, i in palette.items():
m = arr_2d == c
arr_3d[m] = i
return arr_3d
# original label is RGB, we need to have a grayscale label
def convert_from_color(arr_3d, palette=invert_palette):
""" RGB-color encoding to grayscale labels """
arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
for c, i in palette.items():
m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
arr_2d[m] = i
return arr_2d
np.unique(convert_from_color(lblArray))
transformedArray = convert_from_color(lblArray)
show_image(lblImage)
show_image(transformedArray,cmap='gray')
type(transformedArray)
turn to the array to grayscale image, type is "P"
paletteValue = list(palette.values())
paletteValue
temp = Image.fromarray(transformedArray).convert('P')
temp.putpalette(np.array(paletteValue, dtype=np.uint8))
# same as the array has shown!
temp
type(temp)
np.unique(temp)
paletteValue
len(lblNames)
lblNames[0].parent
lblNames[0]
print(f'label shape using PIL to read is {lblImage.shape}')
from skimage import io
print(f'label image shape using skimage to read is {io.imread(lblNames[0]).shape}')
# pay attention to these, using io to read an image is different from PIL,
# PIL shape(6000,6000), io shape (6000,6000,3)
temp = np.asarray(convert_from_color(io.imread(lblNames[0])),dtype='int64')
print(f'transformed label shape is {temp.shape}')
print(f'label has {np.unique(temp)} grayscale values')
type(temp)
temp
temp.shape
convert_to_color(temp).shape
show_image(convert_to_color(temp))
tempImage = Image.fromarray(np.uint8(temp)).convert('P')
tempImage
tempImage.putpalette(np.array(paletteValue, dtype='int64'))
tempImage
shapes are different, so this will decide how we will convert the RGB label to the grayscale label
# turn the images in the annotations to grayscale
classes = ['Impervious surface','Buildings','Low vegetation','Trees','Cars','Clutter','Background']
def turnDataset2Gray():
for index in range(len(lblNames)):
lblImage = Image.open(lblNames[index])
lblTensor= image2tensor(lblImage)
lblArray = to_np(lblTensor).transpose(1,2,0)
transformedArray = convert_from_color(lblArray)
temp = Image.fromarray(transformedArray).convert('P')
temp.putpalette(np.array(paletteValue, dtype=np.uint8))
temp.save(lblNames[index].parent/f'{lblNames[index].stem}.tif')
print(f'{lblNames[index].stem} saved')
# turnDataset2Gray()
import os.path as osp
import numpy as np
from PIL import Image
# convert dataset annotation to semantic segmentation map
data_root = 'iccv09Data'
img_dir = 'images'
ann_dir = 'labels'
osp.join(data_root,ann_dir)
classes
# Let's take a look at the segmentation map we got
import matplotlib.patches as mpatches
img = Image.open(lblNames[0])
plt.figure(figsize=(8, 6))
img
# im = plt.imshow(convert_to_color(np.array(img)))
img.shape
np.unique(np.array(img))
np.unique(convert_to_color(np.array(img)))
test = convert_to_color(np.array(img))
test.shape
show_image(test)
convert_to_color(np.array(img))
# create a patch (proxy artist) for every color
patches = [mpatches.Patch(color=np.array(paletteValue[i])/255.,
label=classes[i]) for i in range(7)]
# put those patched as legend-handles into the legend
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,
fontsize='large')
plt.axis('off')
plt.show()
# split train/val set randomly
split_dir = 'splits'
mmcv.mkdir_or_exist(osp.join(data_root, split_dir))
filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(
osp.join(data_root, ann_dir), suffix='.png')]
len(filename_list)
with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:
# select first 4/5 as train set
train_length = int(len(filename_list)*4/5)
f.writelines(line + '\n' for line in filename_list[:train_length])
with open(osp.join(data_root, split_dir, 'val.txt'), 'w') as f:
# select last 1/5 as train set
f.writelines(line + '\n' for line in filename_list[train_length:])
After downloading the data, we need to implement load_annotations function in the new dataset class StandfordBackgroundDataset.
palette# classes number, 8 in total
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset
@DATASETS.register_module()
class StandfordBackgroundDataset(CustomDataset):
CLASSES = classes
PALETTE = palette
def __init__(self, split, **kwargs):
super().__init__(img_suffix='.jpg', seg_map_suffix='.png',
split=split, **kwargs)
assert osp.exists(self.img_dir) and self.split is not None
from mmcv import Config
cfg = Config.fromfile('../configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')
Since the given config is used to train PSPNet on cityscapes dataset, we need to modify it accordingly for our new dataset.
from mmseg.apis import set_random_seed
# Since we use ony one GPU, BN is used instead of SyncBN
cfg.norm_cfg = dict(type='BN', requires_grad=True)
cfg.model.backbone.norm_cfg = cfg.norm_cfg
cfg.model.decode_head.norm_cfg = cfg.norm_cfg
cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg
# modify num classes of the model in decode/auxiliary head
cfg.model.decode_head.num_classes = 8
cfg.model.auxiliary_head.num_classes = 8
# Modify dataset type and path
cfg.dataset_type = 'StandfordBackgroundDataset'
cfg.data_root = data_root
cfg.data.samples_per_gpu = 8
cfg.data.workers_per_gpu=8
cfg.img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
cfg.crop_size = (256, 256)
cfg.train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **cfg.img_norm_cfg),
dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
cfg.test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(320, 240),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **cfg.img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
cfg.data.train.type = cfg.dataset_type
cfg.data.train.data_root = cfg.data_root
cfg.data.train.img_dir = img_dir
cfg.data.train.ann_dir = ann_dir
cfg.data.train.pipeline = cfg.train_pipeline
cfg.data.train.split = 'splits/train.txt'
cfg.data.val.type = cfg.dataset_type
cfg.data.val.data_root = cfg.data_root
cfg.data.val.img_dir = img_dir
cfg.data.val.ann_dir = ann_dir
cfg.data.val.pipeline = cfg.test_pipeline
cfg.data.val.split = 'splits/val.txt'
cfg.data.test.type = cfg.dataset_type
cfg.data.test.data_root = cfg.data_root
cfg.data.test.img_dir = img_dir
cfg.data.test.ann_dir = ann_dir
cfg.data.test.pipeline = cfg.test_pipeline
cfg.data.test.split = 'splits/val.txt'
# We can still use the pre-trained Mask RCNN model though we do not need to
# use the mask branch
cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
# Set up working dir to save files and logs.
cfg.work_dir = './work_dirs/tutorial'
cfg.runner.max_iters = 200
cfg.log_config.interval = 10
cfg.evaluation.interval = 200
cfg.checkpoint_config.interval = 200
# Set seed to facitate reproducing the result
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
# Let's have a look at the final config used for training
print(f'Config:\n{cfg.pretty_text}')
from mmseg.datasets import build_dataset
from mmseg.models import build_segmentor
from mmseg.apis import train_segmentor
# Build the dataset
datasets = [build_dataset(cfg.data.train)]
# Build the detector
model = build_segmentor(
cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_segmentor(model, datasets, cfg, distributed=False, validate=True,
meta=dict())
Inference with trained model
img = mmcv.imread('iccv09Data/images/6000124.jpg')
model.cfg = cfg
result = inference_segmentor(model, img)
plt.figure(figsize=(8, 6))
show_result_pyplot(model, img, result, palette)